knitr::opts_chunk$set(
echo = TRUE,
error = FALSE,
message = FALSE,
warning = FALSE
)
library(tidyverse)
library(kableExtra)
library(knitr)
library(plotly)
library(broom)
library(wesanderson)
library(robust)
library(ggparl)# reads excel data sheet
data <- read.csv("../data/study4.csv") # reads data
data_sat <- read.csv("../data/raw/stage3_satisfaction.csv") # reads in satisfaction data
# summary(data) # explore the data types, missing data and typos
data <- data %>%
mutate(gender = ifelse(gender == "m", "m", "f")) # corrects an issuewith extra space in "f "
data$gender <- as.factor(data$gender)
data$pp_number <- as.factor(data$pp_number)
data$condition <-as.factor(data$condition)
# mood data
mood1 <- read.csv("../data/raw/stage1_mood.csv") %>%
select(subject, stimulusitem2, response)%>%
rename(response1=response, feeling = stimulusitem2)%>%
mutate(stage = 1)
mood2 <- read.csv("../data/raw/stage2_mood.csv") %>%
select(subject, stimulusitem2, response)%>%
rename(response2=response, feeling = stimulusitem2)%>%
mutate(stage = 2)
mood3 <- read.csv("../data/raw/stage3_mood.csv") %>%
select(subject, stimulusitem2, response) %>%
rename(response3=response, feeling = stimulusitem2)%>%
mutate(stage = 3)
# qualtrics data
#mainly info about drinking
data_q <- read.csv("../data/other/study_4Q.csv")%>%
select(Q21, gender, age, Q15, Q16)%>%
rename(subject = Q21,
units = Q15,
units_beer = Q16)%>%
mutate(subject = as.numeric(subject),
condition = subject %/% 1000)%>%
filter(!is.na(subject))%>%
mutate(subject = as.factor(subject),
gender=as.factor(gender),
age = as.numeric(age),
units = as.numeric(units),
units_beer = as.numeric(units_beer))%>%
filter( subject != "6666")DA <- data %>%
select(pp_number, BMI)%>%
rename(subject = pp_number)
DB <- data_sat %>%
select(subject, response, trialcode ) %>%
mutate(row = row_number(), subject = as.factor(subject)) %>%
pivot_wider(names_from = trialcode, values_from = response) %>%
rename( wtp = VAS_wtp)%>%
select(- row)
D1 <- inner_join( DA,DB,
by = "subject",
keep = FALSE
) %>%
pivot_longer(cols =c(VAS, wtp) ,
names_to = "measure",
values_to = "rating") %>%
inner_join(., data_q,
by = "subject",
keep = FALSE)%>%
filter(!is.na(rating))
# long dataset incl,. info about satisfaction and willingness to pay by condition, and alcohol/ non-alcohol
# need to add focus condition
#Joining mood data
D2 <- inner_join(mood1, mood2,
by = c("subject", "feeling"),
keep = FALSE) %>%
inner_join(., mood3,
by = c("subject","feeling"),
keep = FALSE)%>%
select(- starts_with("stage")) %>%
mutate(condition = subject %/% 1000) %>%
pivot_longer(cols = starts_with("response"),
names_to = "stage",
names_prefix = "response",
values_to = "rating")# gender
total_n <- nrow(data)
males <- data_q%>%filter(gender == "male") %>% nrow()
females <- data_q%>%filter(gender == "female") %>% nrow()
nonbinary <- data_q %>% filter(gender== "non-binary" )%>% nrow()
# by condition
gender_tib <- D1 %>%
filter(measure == "wtp") %>% # avoids duplicating data
group_by( condition) %>%
summarise(n=n(),
BMI = mean(BMI, na.rm = TRUE),
age = mean(age, na.rm = TRUE)) %>%
kable()xtabs(~ gender + condition , data = data_q,drop.unused.levels = TRUE)%>%
kable(digits = 2, caption = "A contingency table with observed gender values/counts across conditions") %>%
kable_styling(full_width = TRUE,
position = "left")| 1 | 2 | 3 | 4 | 5 | 6 | |
|---|---|---|---|---|---|---|
| female | 24 | 21 | 22 | 25 | 30 | 23 |
| male | 6 | 8 | 7 | 6 | 6 | 8 |
| non-binary | 2 | 1 | 1 | 1 | 0 | 0 |
chi_test <- chisq.test(data_q$gender, data_q$condition, simulate.p.value = TRUE)
chi_test$expected %>%
kable(digits = 1, caption = "A contingency table of expected gender values/counts across conditions")%>%
kable_styling(full_width = TRUE,
position = "left")| 1 | 2 | 3 | 4 | 5 | 6 | |
|---|---|---|---|---|---|---|
| female | 24.3 | 22.8 | 22.8 | 24.3 | 27.3 | 23.5 |
| male | 6.9 | 6.4 | 6.4 | 6.9 | 7.7 | 6.7 |
| non-binary | 0.8 | 0.8 | 0.8 | 0.8 | 0.9 | 0.8 |
# χ2 test requires all expected frequencies to be greater than five --> simulate.p.value avoids the problemAltogether 189 participants took part in the study, as is common for psychology studies, most of these were women (145 self-identified as female, 41 as male and 5 as non-binary). The gender split across the conditions was not problematic \(\chi^2\) = 5.28, p = 0.899 (simulated p values based on 2000 replicates, as expected values << 5).
# plot
## bar plot is best here
gender_pal <- c("#d62828", "#003049", "#9d69a3")
gender_cond_plot <-
ggplot(data = data_q)+
geom_bar(aes(x= condition, fill = gender))+
scale_fill_manual(values = gender_pal) +
scale_y_continuous(name="", limits=c(0, 40), breaks = seq(0,40, 5)) +
scale_x_discrete(name = "conditions", breaks = seq(1,6,1), limits = c("1", "2", "3", "4", "5", "6"))+
theme(panel.grid = element_blank())+
theme_minimal()
ggplotly(gender_cond_plot) Number of participants in each conditions
data_desc <- D1 %>%
filter(measure == "wtp")
age_lm <- lm(age ~ condition, data=data_desc)
age_sum <- broom::tidy(age_lm)
age_glance <- broom::glance(age_lm)Participants’ age ranged between 18 and 57. Most participants were very young and the mean age of participants was 20.3 (sd = 4.7). The age did not significantly differ across the six conditions F(181, 1) = 0.26, p = 0.608.
age_plot <- ggplot(data= data_desc, aes(x=condition, y = age, group = condition) )+
geom_boxplot(fill= "#6c757d", alpha = 0.5, outlier.shape = NA) +
geom_jitter(shape=16, position=position_jitter(0.2), aes(color = gender))+
scale_y_continuous(name="age", limits=c(18, 60), breaks = seq(0,60, 10)) +
scale_x_discrete(name = "conditions", limits = c("1", "2", "3", "4", "5", "6"))+
scale_color_manual(values = gender_pal) +
theme_minimal()
age_plot2 <- ggplot(data= data_desc, aes(x=condition, y = age, group = condition) )+
geom_boxplot(fill= "#6c757d", alpha = 0.5, outlier.shape = NA) +
geom_jitter(shape=16, position=position_jitter(0.2), aes(color = gender))+
scale_y_continuous(name="age", limits=c(15, 30), breaks = seq(15,30,5)) +
scale_x_discrete(name = "conditions", limits = c("1", "2", "3", "4", "5", "6"))+
scale_color_manual(values = gender_pal) +
theme_minimal()
ggplotly(age_plot)Participants’ age by condition and gender. Note range of the y-axis
ggplotly(age_plot2)Participants’ age by condition and gender. Note range of the y-axis
bmi_lm <- lm(BMI ~ condition, data=data_desc)
bmi_glance <- broom::glance(bmi_lm)The participants’ BMI ranged between 16.7 - 36.1. The average BMI was NA (sd =NA) and again, this did not differ across conditions F(186, 1) = 1.62, p = 0.205.
BMI_plot <- ggplot(aes(x=condition, y = BMI), data= data_desc)+
geom_boxplot(fill= "#6c757d", alpha = 0.5) +
geom_jitter(shape=16, position=position_jitter(0.2), aes(color = gender))+
scale_y_continuous(name="BMI", limits=c(18, 35), breaks = seq(0,35, 10)) +
scale_x_discrete(name = "conditions", limits = c("1", "2", "3", "4", "5", "6"))+
scale_color_manual(values = gender_pal) +
theme_minimal()
ggplotly(BMI_plot)Participants’ BMI by condition and gender
Visual inspection of participants’ baseline mood does not give raise to any concerns. **Do I need to run 16 anovas? Does not seem necessary…)
mood_base <- D2 %>%
filter(stage == 1)
palette <- c("#033f63","#28666e","#7c9885","#b5b682","#fedc97","#e5e1ee","#c17767","#210203","#eb9486","#82204a", "#0a2342","#2ca58d","#84bc9c","#fffdf7","#f46197","#fec601")
ggplot(data = mood_base, aes(x=condition, y = rating, group = condition, fill = feeling))+
geom_boxplot()+
facet_wrap("feeling")+
scale_fill_manual(values = palette) +
scale_y_continuous(name="", limits=c(0, 100), breaks = seq(0,100, 20)) +
theme(panel.grid = element_blank())+
theme_minimal()Participants’ baseline mood ratings
While the average alcohol consumption was realtively low 10.6 (sd = 8.9), there was a considerable differences among participants. The lowest alcohol consumption was only 1, equivalent to a half a pint of light beer (3.5%) once a week, the highest alcohol consumption was 70 units of alcohol a week, which is equivalent to over 20 pints of strong (~5% abv) beer every week. Interestingly, 45 (23.8%) participants consumed more than the recommended 14 units a week. We also asked participants to estimate how many units of beer they consumed. This was on average 4.8 (sd = 4.7), this however varied between 0 and 30. Thefigures below show participants’ weekly alcohol consumption, weekly beer consumption and the proportion of units of alcohol that came from beer. Neither the overall alcohol consumption nor beer consumption differed significantly across the conditions, F(2, 189) = 3.19, p = 0.069 and F(2, 189) = 0.06, p = 0.804, respectively.
# plot units alcohol consumption by condition
units_plot<-ggplot(aes(x=condition,
y = units,
group = condition),
data= data_q)+
geom_hline(aes(yintercept = 14, linetype = "14 units/week"),
color = "red")+
scale_linetype_manual(name = "max. recommended\nalcohol consumption", values = c(2, 2),
guide = guide_legend(override.aes = list(color = c("red")), title.theme = element_text(size = 8, face = "bold" ) ))+
geom_boxplot(fill= "#6c757d",
alpha = 0.5,
outlier.shape = NA) +
geom_jitter(shape=16,
position=position_jitter(0.2),
aes(color = gender))+
scale_y_continuous(name="alcohol consumption\n(units)",
limits=c(0, 75),
breaks = seq(0,75, 5)) +
scale_x_discrete(name = "conditions",
limits = c("1", "2", "3", "4", "5", "6"))+
scale_color_manual(values = gender_pal) +
theme_minimal()
ggplotly(units_plot)participants’ alcohol consumption by condition
# plot units beer consumption by condition
beer_plot<-ggplot(aes(x=condition,
y = units_beer,
group = condition),
data= data_q)+
geom_boxplot(fill= "#6c757d",
alpha = 0.5,
outlier.shape = NA) +
geom_jitter(shape=16,
position=position_jitter(0.2),
aes(color = gender))+
scale_y_continuous(name="beer consumption\n(units)",
limits=c(0, 35),
breaks = seq(0,35, 5)) +
scale_x_discrete(name = "conditions",
limits = c("1", "2", "3", "4", "5", "6"))+
scale_color_manual(values = gender_pal) +
theme_minimal()
ggplotly(beer_plot)participants’ beer consumption by condition
summary_beer <- data_q %>%
pivot_longer(cols = starts_with("units"), names_to = "al_id", values_to = "units") %>%
mutate(al_id = ifelse(al_id =="units", "all", "beer"))%>%
group_by(condition, al_id)%>%
summarize(units = mean(units))%>%
pivot_wider(names_from = al_id, values_from = units)%>%
mutate(perc= (beer/all)*100)%>%
pivot_longer(cols = c(all, beer), names_to = "alcohol", values_to = "units")
al_pal <- c( "#1d3557","#f4a261")
beer_prop <- ggplot(data=summary_beer,
aes(x=condition,
y=units,
fill = alcohol)) +
geom_bar(stat="identity",
position=position_dodge())+
geom_text(aes(label =paste0(round(perc,1),"%") ),
data = summary_beer %>%filter(alcohol == "beer"),
show.legend = FALSE,
size = 2,
nudge_x = 0.3,
nudge_y = 1,
color = "#1d3557" )+
scale_fill_manual(values = al_pal)+
scale_y_continuous(name="alcohol consumption\n(units)",
limits=c(0, 15),
breaks = seq(0,15, 5)) +
scale_x_discrete(name = "conditions",
limits = c("1", "2", "3", "4", "5", "6"))+
labs( fill = "alcohol")+
theme_minimal()
ggplotly(beer_prop)% of alcohol consumption that is beer by condition